******************************************************************************
*										
*                           Boese & Eberhardt							
*            
*                          Facets of Democracy
*													
******************************************************************************
*                            Data Construction
******************************************************************************
*                           Created: 11th March 2021					
*                      Last Changed: 6th May 2021
******************************************************************************
*                              Markus Eberhardt						
******************************************************************************
*                 School of Economics, University of Nottingham,			
*                  University Park, Nottingham NG7 2RD, England			
*                     email: markus.eberhardt@nottingham.ac.uk				
*                   web: https://sites.google.com/site/medevecon/			
******************************************************************************

clear all
set matsize 11000

global otherpath "/Users/lezme/Dropbox"
global path "/Users/lezme/Dropbox/Facets of Democracy"
global rawpath "/Users/lezme/Dropbox"

global otherpath "D:/Dropbox"
global path "D:/Dropbox/Facets of Democracy"
global rawpath "D:/Dropbox"


global data "$path/Data"
global otherdata "$path/Data"
global do "$path/Do_files"

global texfolder "/Users/lezme/Dropbox/Apps/Overleaf/Facets of Democracy"

****
**** ERT
****

import excel "$data/ERT/Episodes of Regime Transformation.xlsx", sheet("data") firstrow clear

drop v2x_regime  v2x_polyarchy v2x_polyarchy_codelow v2x_polyarchy_codehigh
* Pick these from V-Dem v11

label var	country_id		"ERT ID"
label var	wbcode			"Country code"
label var	country_name	"Country name"
label var	year			"Year"
label var	reg_start_year	"Start of Current regime passing ROW threshold (ish)"
label var	reg_end_year	"End of Current regime passing ROW threshold (ish)"
label var	reg_id			"Regime ID"
label var	reg_type		"Current regime: Autocracy or Democracy"
label var	reg_trans		"Final year of regime: -1 dem breakdown, 1 dem transition"
label var	founding_elec	"Year first election held under democracy"
label var	row_regch_event	"Regime change according to ROW alone"
label var	row_regch_censored	"Regime change ROW but within tolerance period"
label var	dem_ep			"Ongoing democratisation episode [0,1]"
label var	dem_ep_id		"Ongoing democratisation episode ID"
label var	dem_ep_start_year "Ongoing democratisation episode start year"
label var	dem_ep_end_year	"Ongoing democratisation episode end year"
label var	dem_pre_ep_year	"Dummy for democratisation episode in the following year"
label var	dem_ep_termition "Why did the democratisation episode end? [0-4]"	
label var	sub_dem_ep		"What subtype of dem ep: dem deepening or liberal autoc"
label var	sub_dem_ep_id	"Dem subtype ID"
label var	sub_dem_ep_start_year	"Democratic episode subtype start year"
label var	sub_dem_ep_end_year		"Democratic episode subtype end year"
label var	dem_ep_outcome		"Democratic episode subtype outcome"
label var	dem_ep_censored		"Democratic episode subtype censored"
label var	aut_ep			"Ongoing autocratisation episode [0,1]"
label var	aut_ep_id		"Ongoing autocratisation episode ID"
label var	aut_ep_start_year	"Ongoing autocratisation episode start year"
label var	aut_ep_end_year	"Ongoing autocratisation episode end year"	
label var	aut_pre_ep_year	"Dummy for autocratisation episode in the following year"
label var	aut_ep_termition	"Why did the autocratisation episode end? [0-4]"
label var	sub_aut_ep		"What subtype of auto ep: dem or auto regression"
label var	sub_aut_ep_id	"Auto subtype ID"
label var	sub_aut_ep_start_year	"Autocratisation episode subtype start year"
label var	sub_aut_ep_end_year	"Autocratisation episode subtype end year"
label var	aut_ep_outcome	"Auocratisation episode subtype outcome"
label var	aut_ep_censored	"Auocratisation episode subtype censored"

* Full sample: 427 episodes leading to 227 regime changes and 200 cases without regime change.
* The 227 regime changes are split into 74 shifts to autocracy and 153 shifts to democracy.

/*
								ROW
ERT        |        -1          0          1 |     Total
-----------+---------------------------------+----------
        -1 |        74          0          0 |        74 
         0 |        53     18,118         50 |    18,221 
         1 |         0          0        153 |       153 
-----------+---------------------------------+----------
     Total |       127     18,118        203 |    18,448 
	 
ROW has 50 as democratic regime change which are not included in ERT 
ROW has 53 as autocratic regime change which are not included in ERT 

*/

sort wbcode year
save "$data/ert.dta", replace


****
**** V-Dem
****

use "$data/VDem/v-dem v11/V-Dem-CY-Core-v11.dta", clear
*keep if year>1900
rename country_text_id wbcode 
keep country_name-year v2x_regime v2x_libdem v2x_regime_amb v2x_freexp v2x_frassoc_thick v2xel_frefair v2x_elecoff v2x_suffr v2elfrfair v2x_liberal v2xcl_rol v2x_jucon v2xlg_legcon v2x_rule v2x_freexp_altinf v2x_polyarchy  v2clprptym v2clprptyw
sort wbcode year
save "$otherdata/VDem/vdem_clean11.dta", replace


****
**** Boix et al
****

use "$otherdata/BMR/democracy-v3.0.dta", clear
drop if year<1950
sort abbreviation year
by abbreviation: gen c=1 if year==year[_n-1]
drop if c==1
drop if abbreviation=="NA"
keep abbreviation year democracy-democracy_femalesuffrage
rename abbreviation wbcode
label var democracy "Dichotomous democracy measure (Boix et al)"
rename democracy dem_boix
local z "_trans _breakdowns _duration _omitteddata _femalesuffrage"
foreach k of local z{
	rename democracy`k' dem_boix`k'
}
sort wbcode year
save "$data/BMR/boix_clean3.dta", replace



****
****  Polity IV
****

import excel "$data/PolityIV/p4v2018.xls", sheet("p4v2017") firstrow clear
drop cyear ccode scode
run "$do/A_wbcode.do"
tab country if missing(wbcode)
sort wbcode year
by wbcode: gen c=1 if year==year[_n-1]
tab wbcode year if c==1
order c
drop if c==1
drop if missing(wbcode)
encode wbcode, gen(nwbcode)
tsset nwbcode year
xtreg polity2, fe
drop nwbcode
sort wbcode year
save "$data/polityIV_clean.dta", replace


****
**** DOTS 1948-2019
****

* Trade (goods exports fob and goods imports cif in millions US$)

insheet using "$data/dots1948_2019_exp_fob.csv", names comma clear
reshape long exports_fob, i(country) j(year)
drop if missing(country)
sort country year
save "$data/dots1948_2019_exp_fob.dta", replace

insheet using "$data/dots1948_2019_imp_cif.csv", names comma clear
reshape long imports_cif, i(country) j(year)
drop if missing(country)
sort country year
save "$data/dots1948_2019_imp_cif.dta", replace

merge country year using "$data/dots1948_2019_exp_fob.dta", sort
drop if _merge==2
drop _merge


run "$do/A_wbcode.do"
tab country if missing(wbcode)

replace wbcode="ZAR" if country=="Congo, Democratic Republic of the"
replace wbcode="CIV" if _n>=3817 & _n<=3888
replace wbcode="PRK" if country=="Korea, Democratic People's Republic"
replace wbcode="KXK" if country=="Kosovo"
replace wbcode="STP" if _n>=13609 & _n<=13680
drop if country=="USSR" & year<1980
drop if country=="USSR" & year>1991
drop if country=="Russian Federation" & year>=1980 & year<=1991
sort country year

drop if country=="Yemen"
drop if country=="Yemen, People's Dem. Rep. of"
replace wbcode="YEM" if country=="Yemen, Rep. of"
drop if wbcode=="YUG"

sort wbcode year 
drop if missing(wbcode)
gen ex_trade = exports_fob/(exports_fob+imports_cif)
save "$data/dots1948_2019.dta", replace


****
**** Maddison/Bolt et al (2020)
****

use "$data/Historical data/mpd2020.dta", clear
rename countrycode wbcode
rename country country_mad

****
**** Merge GDPpc and ERT data and V-Dem data and DOTS data
****
merge wbcode year using "$data/ert.dta", sort
drop _merge
gen MADDISON = ""
label var MADDISON "*************************************************"
gen VDEM_ERT = ""
label var VDEM_ERT "*************************************************"
order wbcode year country_mad country_id country_name MADDISON ///
	gdppc pop VDEM_ERT

merge wbcode year using "$otherdata/VDem/vdem_clean11.dta", sort

gen VDEM_VARS = ""
label var VDEM_VARS "*************************************************"
drop _merge

merge wbcode year using "$data/dots1948_2019.dta", sort
gen DOTS_VARS = ""
label var DOTS_VARS "*************************************************"
drop _merge 

merge wbcode year using "$data/polityIV_clean.dta", sort
gen POLITY_IV = ""
label var POLITY_IV "*************************************************"

sort wbcode year
encode wbcode, gen(nwbcode)
tsset nwbcode year

order nwbcode wbcode-aut_ep_censored VDEM_VARS v2x_polyarchy-v2x_freexp DOTS_VARS country imports_cif exports_fob ex_trade POLITY_IV
drop _merge c

merge wbcode year using	"$data/BMR/boix_clean3.dta", sort 
drop _merge 

gen DEM_ANRR = ""
label var DEM_ANRR "*************************************************"
order 	nwbcode-bornyear
drop dem_ANRRa 
gen BOIX_ET_AL=""
label var BOIX_ET_AL "*************************************************"
gen CHEIBUB_ET_AL=""
label var CHEIBUB_ET_AL "*************************************************"

gen REGRESSION_VARS = ""
label var REGRESSION_VARS "*************************************************"
order nwbcode wbcode-regtrans BOIX dem_boix-dem_boix_femalesuffrage REGRESSION_VARS
drop _merge* 



xtreg gdppc reg_trans dem_ep v2x_freexp ex_trade, fe
* n=8,573, N=157, avg T=54.6 (min 13, max 71); 1948-2018

gen c=1 if e(sample)
sort nwbcode
by nwbcode: egen csum=sum(c) if e(sample)

drop if missing(csum)
sort wbcode year
tab csum v2x_regime
order nwbcode wbcode year csum
sort nwbcode year
drop c 
save "$data/madd_ert_vdem_2021.dta", replace
